# -*- coding: utf-8 -*-
# @Date    : 2021/4/24
# @Author  : Maoxian
import os
import time

import requests
from lxml import etree

headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.85 Safari/537.36 Edg/90.0.818.46"
}


def get_base_url(url):
    """获得url的base_url"""
    prefix = url.split("://")[0]
    host = url.split("://")[1].split('/')[0]
    base_url = f'{prefix}://{host}'
    return base_url


def get_novel_info(url):
    """获得小说相关信息"""
    r = requests.get(url, headers)
    novel_html = etree.HTML(r.content.decode('utf8'))

    return dict(
        name=novel_html.xpath('//div[@class="info"]/div[1]/h1/text()')[0],  # 书名
        author=novel_html.xpath('//div[@class="info"]/div[1]/div/p[1]/text()')[0],  # 作者
        type=novel_html.xpath('//div[@class="info"]/div[1]/div/p[2]/text()')[0],  # 类型
        status=novel_html.xpath('//div[@class="info"]/div[1]/div/p[3]/text()')[0],  # 状态
        latest_time=novel_html.xpath('//div[@class="info"]/div[1]/div/p[5]/text()')[0],  # 最后更新
        profile=novel_html.xpath('//div[@class="info"]/div[2]/text()')[0],  # 简介
    )


def get_chapter_url(url):
    """获取小说的各章节标题和url"""
    r = requests.get(url, headers)
    novel_html = etree.HTML(r.content.decode('utf8'))
    titles = []
    urls = []
    next_url = True
    while next_url:
        titles += novel_html.xpath('//div[@class="layout layout-col1"]/div[2]/ul/li/a/text()')
        urls += novel_html.xpath('//div[@class="layout layout-col1"]/div[2]/ul/li//a/@href')
        next_url = novel_html.xpath('//div[@class="listpage"]/span[3]/a/@href')
        if next_url:
            r = requests.get(get_base_url(url) + next_url[0])
            novel_html = etree.HTML(r.content.decode('utf8'))

    return zip(titles, urls)


def get_chapter_content(url):
    """获取章节的内容"""
    r = requests.get(url, headers)
    tree = etree.HTML(r.content.decode('utf8'))

    title = tree.xpath('//h1[@class="title"]/text()')[0]
    content = tree.xpath('//*[@id="content"]/text()')
    content = ('\n'.join([i.strip() for i in content]))
    return title, content


def download_content(path, content):
    """下载小说内容"""
    with open(path, 'w', encoding='utf8') as f:
        f.write(content[0] + '\n')
        f.write(content[1])
    time.sleep(0.5)


def main(url):
    # 1. 分析小说地址的 base_url
    base_url = get_base_url(novel_url)

    # 2. 获取小说详情,并打印
    novel_info = get_novel_info(novel_url)
    print(novel_info['name'])
    print(novel_info['author'])
    print(novel_info['type'])
    print(novel_info['status'])
    print(novel_info['latest_time'])
    print(novel_info['profile'])

    # 创建小说文件夹
    if not os.path.exists(novel_info['name']):
        os.mkdir(novel_info['name'])

    # 3. 获取小说的各章节标题和url
    for title, url in get_chapter_url(novel_url):
        try:
            print(f"====  开始下载： {title} {url}  ====")
            # 4. 获取一个章节的内容
            content = get_chapter_content(base_url + url)
            # 5. 下载各章节内容到文件中
            path = f'{novel_info["name"]}/{title}.txt'
            download_content(path, content)
            print(f"====  下载完成： {title}  ====")
        except Exception as e:
            print(f'====   下载失败：{title} {url}  ====')
    print(f"====  小说{novel_info['name']}下载完成  ====")


if __name__ == '__main__':
    # novel_url = "http://www.gxlztc.net/book/37878/index_1.html"
    novel_url = input("请输出一个小说的目录页地址：")
    main(novel_url)
